#!/bin/bash

# tidy_subdirs - Tidy up subdirectories matching a regexp

# Copyright (c) 2003 IBM, Martin Schwenke <martins@au.ibm.com>

# This file is part of the Linux lsvpd package.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
    
# $Id: tidy_subdirs,v 1.1 2006/04/11 18:38:28 emunson Exp $

# FIXME: Only handle a single pattern right now.  Multiple patterns
# look like they introduce shell quoting problems.

######################################################################

function usage ()
{
    cat <<EOF 1>&2
usage: $0 directory pattern

pattern   is a regexp (probably protected from the shell)

For example, '$0 /tmp "^yip-"' causes subdirectories
called, say, "/tmp/yip-1", "/tmp/yip-hello", etc. to be tidied.
EOF
    exit 1
}

[ $# -eq 2 ] || usage

if [ ! -d "$1" ] ; then
    echo "No such directory \"$1\"" 1>&2
    usage
fi

dir="$1"
pattern="$2"

######################################################################

function dirs_same ()
{
    diff -r "$1" "$2" >/dev/null 2>&1
}

function do_group ()
{
    local first="$1"
    local oldest="$2"
    local linkable="$3"

    if "$first" ; then
	link_group "$oldest" "$linkable"
    else
	compress_group "$oldest" "$linkable"
    fi
}

function link_group ()
{
    local oldest="$1"
    local linkable="$2"

    local i

    for i in $linkable ; do
	# This is closer to foolproof that the alternatives if we get killed.
	local tmp=".tmp$$.${i}"
	rm -rf "$tmp"  # This should be a noop.
	mv "$i" "$tmp"
	ln -s "$oldest" "$i"
	rm -rf "$tmp"
    done
}

# Below, we go to great lengths to avoid dangling symlinks and other
# breakage.  It is still possible that someone will try to process a
# directory while it is being removed, causing partial or
# unpredictable results.  For this reason, each directory is moved
# aside to a temporary location before it is removed.
function compress_group ()
{
    local oldest="$1"
    local linkable="$2"

    local i
    local tmp

    # Do this first so we don't get stale links if killed.
    tar czf "${oldest}.tar.gz" "$oldest"

    # Copy symlinks that point to this directory, so they point to the
    # .tar.gz equivalent.  This is very inefficient, and could be done
    # in one pass at the end, but then there would be dangling
    # symlinks between now and then.  However, if this script is run
    # often enough, this function will be called relatively rarely,
    # since there probably won't be a lot of directories.
    find . -maxdepth 1 -mindepth 1 -type l | \
    sed -e 's/^\.\///' | \
    grep "$pattern" | \
    while read i ; do
	# Cheap & nasty readlink.
	local l=$(ls -l "$i" | sed -e 's/.*-> *//')
	if [ "$l" = "$oldest" ] ; then
	    ln -s "${oldest}.tar.gz" "${i}.tar.gz"
	    echo "$i"
	fi
    done | \
    {
	# Create new symlinks to replace directories, as per $linkable arg.
	for i in $linkable ; do
	    ln -s "${oldest}.tar.gz" "${i}.tar.gz"
	    tmp=".tmp$$.${i}"
	    mv "$i" "$tmp"
	    rm -rf "$tmp"
	done

	# Remove the old, soon-to-be dangling links - they're on stdin.
	while read i ; do
	    rm -f "$i"
	done
    }

    tmp=".tmp$$.${oldest}"
    mv "$oldest" "$tmp"
    rm -rf "$tmp"
}

######################################################################

cd "$dir"

# Walk along the matching directories, from newest to oldest finding
# groups of identical directories.  When we find a different one we
# process the group of identical directories, replacing the newer ones
# by symlinks to the oldest.  Actually, we only do that for the 1st
# group.  For subsequent groups we .tar.gz the oldest directory and
# create symlinks to it to replace the newer directories.

first_group=true
same_linkable=""

same_oldest=""

ls -lt --full-time | \
grep '^d' | \
sed -e 's/.*\+[0-9]* //' | \
grep "$pattern" | \
{
    while read i ; do
	if [ -n "$same_oldest" ] ; then
	    if dirs_same "$i" "$same_oldest" ; then
		same_linkable="${same_linkable} ${same_oldest}"
	    else
		do_group "$first_group" "$same_oldest" "$same_linkable"
		first_group=false
		same_linkable=""
	    fi
	fi
	same_oldest="$i"
    done

    # This has to be in the same subshell as the above loop, since it
    # is at the end of a pipeline, otherwise changes to variables from
    # within the loop will not be available here.
    if [ -n "$same_oldest" ] ; then
	do_group "$first_group" "$same_oldest" "$same_linkable"
    fi
}
